home *** CD-ROM | disk | FTP | other *** search
-
- /* dgg -- for use as Mac child-app */
- #define MACINTOSH 1
-
- /* CONTIG ASSEMBLY PROGRAM (CAP)
-
- copyright (c) 1991 Xiaoqiu Huang
- The distribution of the program is granted provided no charge
- is made and the copyright notice is included.
-
- Proper attribution of the author as the source of the software
- would be appreciated:
- "A Contig Assembly Program Based on Sensitive Detection of
- Fragment Overlaps" (submitted to Genomics, 1991)
- Xiaoqiu Huang
- Department of Computer Science
- Michigan Technological University
- Houghton, MI 49931
- E-mail: huang@cs.mtu.edu
-
- The CAP program uses a dynamic programming algorithm to compute
- the maximal-scoring overlapping alignment between two fragments.
- Fragments in random orientations are assembled into contigs by a
- greedy approach in order of the overlap scores. CAP is efficient
- in computer memory: a large number of arbitrarily long fragments
- can be assembled. The time requirement is acceptable; for example,
- CAP took 4 hours to assemble 1015 fragments of a total of 252 kb
- nucleotides on a Sun SPARCstation SLC. The program is written in C
- and runs on Sun workstations.
-
- Below is a description of the parameters in the #define section of CAP.
- Two specially chosen sets of substitution scores and indel penalties
- are used by the dynamic programming algorithm: heavy set for regions
- of low sequencing error rates and light set for fragment ends of high
- sequencing error rates. (Use integers only.)
- Heavy set: Light set:
-
- MATCH = 2 MATCH = 2
- MISMAT = -6 LTMISM = -3
- EXTEND = 4 LTEXTEN = 2
-
- In the initial assembly, any overlap must be of length at least OVERLEN,
- and any overlap/containment must be of identity percentage at least
- PERCENT. After the initial assembly, the program attempts to join
- contigs together using weak overlaps. Two contigs are merged if the
- score of the overlapping alignment is at least CUTOFF. The value for
- CUTOFF is chosen according to the value for MATCH.
-
- DELTA is a parameter in necessary conditions for overlap/containment.
- Those conditions are used to quickly reject pairs of fragments that
- could not possibly have an overlap/containment relationship.
- The dynamic programming algorithm is only applied to pairs of fragments
- that pass the screening. A large value for DELTA means stringent
- conditions, where the value for DELTA is a real number at least 8.0.
-
- POS5 and POS3 are fragment positions such that the 5' end between base 1
- and base POS5, and the 3' end after base POS3 are of high sequencing
- error rates, say more than 5%. For mismatches and indels occurring in
- the two ends, light penalties are used.
-
- A file of input fragments looks like:
- >G019uabh
- ATACATCATAACACTACTTCCTACCCATAAGCTCCTTTTAACTTGTTAAA
- GTCTTGCTTGAATTAAAGACTTGTTTAAACACAAAAATTTAGAGTTTTAC
- TCAACAAAAGTGATTGATTGATTGATTGATTGATTGATGGTTTACAGTAG
- GACTTCATTCTAGTCATTATAGCTGCTGGCAGTATAACTGGCCAGCCTTT
- AATACATTGCTGCTTAGAGTCAAAGCATGTACTTAGAGTTGGTATGATTT
- ATCTTTTTGGTCTTCTATAGCCTCCTTCCCCATCCCCATCAGTCTTAATC
- AGTCTTGTTACGTTATGACTAATCTTTGGGGATTGTGCAGAATGTTATTT
- TAGATAAGCAAAACGAGCAAAATGGGGAGTTACTTATATTTCTTTAAAGC
- >G028uaah
- CATAAGCTCCTTTTAACTTGTTAAAGTCTTGCTTGAATTAAAGACTTGTT
- TAAACACAAAATTTAGACTTTTACTCAACAAAAGTGATTGATTGATTGAT
- TGATTGATTGATGGTTTACAGTAGGACTTCATTCTAGTCATTATAGCTGC
- TGGCAGTATAACTGGCCAGCCTTTAATACATTGCTGCTTAGAGTCAAAGC
- ATGTACTTAGAGTTGGTATGATTTATCTTTTTGGTCTTCTATAGCCTCCT
- TCCCCATCCCATCAGTCT
- >G022uabh
- TATTTTAGAGACCCAAGTTTTTGACCTTTTCCATGTTTACATCAATCCTG
- TAGGTGATTGGGCAGCCATTTAAGTATTATTATAGACATTTTCACTATCC
- CATTAAAACCCTTTATGCCCATACATCATAACACTACTTCCTACCCATAA
- GCTCCTTTTAACTTGTTAAAGTCTTGCTTGAATTAAAGACTTGTTTAAAC
- ACAAAATTTAGACTTTTACTCAACAAAAGTGATTGATTGATTGATTGATT
- GATTGAT
- >G023uabh
- AATAAATACCAAAAAAATAGTATATCTACATAGAATTTCACATAAAATAA
- ACTGTTTTCTATGTGAAAATTAACCTAAAAATATGCTTTGCTTATGTTTA
- AGATGTCATGCTTTTTATCAGTTGAGGAGTTCAGCTTAATAATCCTCTAC
- GATCTTAAACAAATAGGAAAAAAACTAAAAGTAGAAAATGGAAATAAAAT
- GTCAAAGCATTTCTACCACTCAGAATTGATCTTATAACATGAAATGCTTT
- TTAAAAGAAAATATTAAAGTTAAACTCCCCTATTTTGCTCGTTTTTGCTT
- ATCTAAAATACATTCTGCACAATCCCCAAAGATTGATCATACGTTAC
- >G006uaah
- ACATAAAATAAACTGTTTTCTATGTGAAAATTAACCTANNATATGCTTTG
- CTTATGTTTAAGATGTCATGCTTTTTATCAGTTGAGGAGTTCAGCTTAAT
- AATCCTCTAAGATCTTAAACAAATAGGAAAAAAACTAAAAGTAGAAAATG
- GAAATAAAATGTCAAAGCATTTCTACCACTCAGAATTGATCTTATAACAT
- GAAATGCTTTTTAAAAGAAAATATTAAAGTTAAACTCCCC
- A string after ">" is the name of the following fragment.
- Only the five upper-case letters A, C, G, T and N are allowed
- to appear in fragment data. No other characters are allowed.
- A common mistake is the use of lower case letters in a fragment.
-
- To run the program, type a command of form
-
- cap file_of_fragments
-
- The output goes to the terminal screen. So redirection of the
- output into a file is necessary. The output consists of three parts:
- overview of contigs at fragment level, detailed display of contigs
- at nucleotide level, and consensus sequences.
- '+' = direct orientation; '-' = reverse complement
- The output of CAP on the sample input data looks like:
-
- #Contig 1
-
- #G022uabh+(0)
- TATTTTAGAGACCCAAGTTTTTGACCTTTTCCATGTTTACATCAATCCTGTAGGTGATTG
- GGCAGCCATTTAAGTATTATTATAGACATTTTCACTATCCCATTAAAACCCTTTATGCCC
- ATACATCATAACACTACTTCCTACCCATAAGCTCCTTTTAACTTGTTAAAGTCTTGCTTG
- AATTAAAGACTTGTTTAAACACAAAA-TTTAGACTTTTACTCAACAAAAGTGATTGATTG
- ATTGATTGATTGATTGAT
- #G028uaah+(145)
- CATAAGCTCCTTTTAACTTGTTAAAGTCTTGCTTGAATTAAAGACTTGTTTAAACACAAA
- A-TTTAGACTTTTACTCAACAAAAGTGATTGATTGATTGATTGATTGATTGATGGTTTAC
- AGTAGGACTTCATTCTAGTCATTATAGCTGCTGGCAGTATAACTGGCCAGCCTTTAATAC
- ATTGCTGCTTAGAGTCAAAGCATGTACTTAGAGTTGGTATGATTTATCTTTTTGGTCTTC
- TATAGCCTCCTTCCCCATCCC-ATCAGTCT
- #G019uabh+(120)
- ATACATCATAACACTACTTCCTACCCATAAGCTCCTTTTAACTTGTTAAAGTCTTGCTTG
- AATTAAAGACTTGTTTAAACACAAAAATTTAGAGTTTTACTCAACAAAAGTGATTGATTG
- ATTGATTGATTGATTGATGGTTTACAGTAGGACTTCATTCTAGTCATTATAGCTGCTGGC
- AGTATAACTGGCCAGCCTTTAATACATTGCTGCTTAGAGTCAAAGCATGTACTTAGAGTT
- GGTATGATTTATCTTTTTGGTCTTCTATAGCCTCCTTCCCCATCCCCATCAGTCTTAATC
- AGTCTTGTTACGTTATGACT-AATCTTTGGGGATTGTGCAGAATGTTATTTTAGATAAGC
- AAAA-CGAGCAAAAT-GGGGAGTT-A-CTT-A-TATTT-CTTT-AAA--GC
- #G023uabh-(426)
- GTAACGT-ATGA-TCAATCTTTGGGGATTGTGCAGAATGT-ATTTTAGATAAGCAAAAAC
- GAGCAAAATAGGGGAGTTTAACTTTAATATTTTCTTTTAAAAAGCATTTCATGTTATAAG
- ATCAATTCTGAGTGGTAGAAATGCTTTGACATTTTATTTCCATTTTCTACTTTTAGTTTT
- TTTCCTATTTGTTTAAGATCGTAGAGGATTATTAAGCTGAACTCCTCAACTGATAAAAAG
- CATGACATCTTAAACATAAGCAAAGCATATTTTTAGGTTAATTTTCACATAGAAAACAGT
- TTATTTTATGTGAAATTCTATGTAGATATACTATTTTTTTGGTATTTATT
- #G006uaah-(496)
- GGGGAGTTTAACTTTAATATTTTCTTTTAAAAAGCATTTCATGTTATAAGATCAATTCTG
- AGTGGTAGAAATGCTTTGACATTTTATTTCCATTTTCTACTTTTAGTTTTTTTCCTATTT
- GTTTAAGATCTTAGAGGATTATTAAGCTGAACTCCTCAACTGATAAAAAGCATGACATCT
- TAAACATAAGCAAAGCATATNNT-AGGTTAATTTTCACATAGAAAACAGTTTATTTTATG
- T
-
-
-
- Slight modifications by S. Smith on Mon Feb 17 10:18:34 EST 1992.
- These changes allow for command line arguements for several
- of the hard coded parameters, as well as a slight modification to
- the output routine to support GDE format. Changes are commented
- as:
-
- Mod by S.S.
-
- */
-
-
- #pragma segment Cap2
-
- #include <stdio.h>
- #include <stdlib.h>
-
-
- int OVERLEN; /* Minimum length of any overlap */
- float PERCENT; /* Minimum identity percentage of any overlap */
- #define CUTOFF 50 /* cutoff score for overlap or containment */
- #define DELTA 9.0 /* Jump increment in check for overlap */
- #define MATCH 2 /* score of a match */
- #define MISMAT -6 /* score of a mismatch */
- #define LTMISM -3 /* light score of a mismatch */
- #define OPEN 0 /* gap open penalty */
- #define EXTEND 4 /* gap extension penalty */
- #define LTEXTEN 2 /* light gap extension penalty */
- #define POS5 30 /* Sequencing errors often occur before base POS5 */
- #define POS3 475 /* Sequencing errors often occur after base POS3 */
- #define LINELEN 60 /* length of one printed line */
- #define NAMELEN 20 /* length of printed fragment name */
- #define TUPLELEN 4 /* Maximum length of words for lookup table */
- #define SEQLEN 2000 /* initial size of an array for an output fragment */
-
- static int over_len;
- static float per_cent;
- typedef struct OVERLAP /* of 5' and 3' segments */
- {
- int number; /* index of 3' segment */
- int host; /* index of 5' segment */
- int ind; /* used in reassembly */
- int stari; /* start position of 5' suffix */
- int endi; /* end position of 5' suffix */
- int starj; /* start position of 3' prefix */
- int endj; /* end position of 3' prefix */
- short orienti; /* orientation of 5' segment: 0= rev. */
- short orientj; /* orientation of 3' segment: 0= rev. */
- int score; /* score of overlap alignment */
- int length; /* length of alignment */
- int match; /* number of matches in alignment */
- short kind; /* 0 = containment; 1 = overlap */
- int *script; /* script for constructing alignment */
- struct OVERLAP *next;
- } over, *overptr;
- struct SEG
- {
- char *name; /* name string */
- int len; /* length of segment name */
- char *seq; /* segment sequence */
- char *rev; /* reverse complement */
- int length; /* length of sequence */
- short kind; /* 0 = contain; 1 = overlap */
- int *lookup; /* lookup table */
- int *pos; /* location list */
- overptr list; /* list of overlapping edges */
- } *segment; /* array of segment records */
- int seg_num; /* The number of segments */
- overptr *edge; /* set of overlapping edges */
- int edge_num; /* The number of overlaps */
- struct CONS /* 1 = itself; 0 = reverse complement */
- {
- short isfive[2]; /* is 5' end free? */
- short isthree[2]; /* is 3' end free? */
- short orient[2]; /* orientation of 3' segment */
- int group; /* contig serial number */
- int next[2]; /* pointer to 3' adjacent segment */
- int other; /* the other end of the contig */
- int child; /* for the containment tree */
- int brother;
- int father;
- overptr node[2]; /* pointers to overlapping edges */
- } *contigs; /* list of contigs */
- struct TTREE /* multiple alignment tree */
- {
- short head; /* head = 1 means the head of a contig */
- short orient; /* orientation */
- int begin; /* start position of previous segment */
- int *script; /* alignment script */
- int size; /* length of script */
- int next; /* list of overlap segments */
- int child; /* list of child segments */
- int brother; /* list of sibling segments */
- } *mtree;
- int vert[128]; /* converted digits for 'A','C','G','T' */
- int vertc[128]; /* for reverse complement */
- int tuple; /* tuple = TUPLELEN - 1 */
- int base[TUPLELEN]; /* locations of a lookup table */
- int power[TUPLELEN]; /* powers of 4 */
- typedef struct OUT
- {
- char *line; /* one print line */
- char *a; /* pointer to slot in line */
- char c; /* current char */
- char *seq; /* pointer to sequence */
- int length; /* length of segment */
- int id; /* index of segment */
- int *s; /* pointer to script vector */
- int size; /* size of script vector */
- int op; /* current operation */
- char name[NAMELEN+2];/* name of segment */
- short done; /* indicates if segment is done */
- int loc; /* position of next segment symbol */
- char kind; /* type of next symbol of segment */
- char up; /* type of upper symbol of operation */
- char dw; /* type of lower symbol of operation */
- int offset; /* relative to consensus sequence */
- int linesize; /* size of array line */
- struct OUT *child; /* pointer to child subtree */
- struct OUT *brother; /* pointer to brother node */
- struct OUT *link; /* for operation linked list */
- struct OUT *father; /* pointer to father node */
- } row, *rowptr; /* node for segment */
- rowptr *work; /* a set of working segments */
- rowptr head, tail; /* first and last nodes of op list */
- struct VX
- {
- int id; /* Segment index */
- short kind; /* overlap or containment */
- overptr list; /* list of overlapping edges */
- } *piece; /* array of segment records */
- char *allconsen, *allconpt; /* Storing consensus sequences */
-
- /* dgg patches for mac use */
-
- char
- nuccomp( symbol) char symbol;
- {
- switch ( symbol ) {
- case 'A' : return 'T';
- case 'a' : return 't';
- case 'T' : return 'A';
- case 't' : return 'a';
- case 'C' : return 'G';
- case 'c' : return 'g';
- case 'G' : return 'C';
- case 'g' : return 'c';
- default : return symbol;
- }
- }
-
- char *ckalloc(int amount); /* for borland c */
-
-
- typedef char cvec[128];
-
- FILE* fout; /* output file */
-
-
- #ifdef MACINTOSH
- // use with ChildApp.c
-
- int RealMain( int argc, char *argv[])
-
- #else
-
- int main(argc, argv)
- int argc; char *argv[];
- #endif
- {
- int M; /* Sequence length */
- /* dgg -- make smaller arrays for mac */
- /* int V[128][128], Q,R;
- int V1[128][128], R1;
- */
- cvec *V, *V1;
- short Q,R, R1;
- int total; /* Total of segment lengths */
- int number; /* The number of segments */
- char *sequence; /* Storing all segments */
- char *reverse; /* Storing all reverse segments */
- int symbol, prev; /* The next character */
- FILE *Ap, *ckopen(); /* For the sequence file */
- char *ckalloc(); /* space-allocating function */
- register int i, j, k; /* index variables */
- /* Mod by S.S. */
- int jj;
- short heading; /* 1: heading; 0: sequence */
-
- /*
- * Mod by S.S. & dgg
- *
- if ( argc != 2 )
- fatalf("The proper form of command is: \n%s file_of_fragments",
- argv[0]);
- */
- if ( argc < 5 ) {
- short i;
- for (i= 0; i<argc; i++) fprintf(stderr, "arg[%d]=%s \n", i, argv[i]);
- fatalf("usage: %s file_of_fragments output_file MIN_OVERLAP PERCENT_MATCH",
- argv[0]);
- }
- sscanf(argv[3],"%d",&OVERLEN);
- sscanf(argv[4],"%d",&jj);
- PERCENT = (float)jj/100.0;
- if(PERCENT < 0.25) PERCENT = 0.25;
- if(PERCENT > 1.0) PERCENT = 1.0;
- if(OVERLEN < 1) OVERLEN = 1;
- if(OVERLEN > 100) OVERLEN = 100;
-
- if (argv[2] == "stdout" || argv[2] == "-") fout= NULL;
- else fout = fopen( argv[2], "w");
- if (!fout) fout = stdout;
-
- /* determine number of segments and total lengths */
- j = 0;
-
- Ap = ckopen(argv[1], "r");
- prev = '\n';
- for (total = 3, number = 0; ( symbol = fgetc(Ap)) != EOF ; total++ )
- {
- if ( symbol == '>' && prev == '\n' )
- number++;
- prev = symbol;
- }
- (void) fclose(Ap);
-
- /* dgg - allocate space for arrays */
- /* for (i=0; i<128; i++) V[i]= ( char *) ckalloc( 128 * sizeof(char)); NO NO */
- V= ( cvec *) ckalloc( 128 * 128 * sizeof(char)); /* !! yes */
- V1= ( cvec *) ckalloc( 128 * 128 * sizeof(char));
-
- total += number * 20;
- /* allocate space for segments */
- sequence = ( char * ) ckalloc( total * sizeof(char));
- reverse = ( char * ) ckalloc( total * sizeof(char));
- allconpt = allconsen = ( char * ) ckalloc( total * sizeof(char));
- segment = ( struct SEG * ) ckalloc( number * sizeof(struct SEG));
-
- /* read the segments into sequence */
- M = 0;
- Ap = ckopen(argv[1], "r");
- number = -1;
- heading = 0;
- prev = '\n';
- for ( i = 0, k = total ; ( symbol = fgetc(Ap)) != EOF ; )
- {
- if ( symbol != '\n' )
- {
- sequence[++i] = symbol;
- /* reverse[--k] = nuccomp( symbol); */
- switch ( symbol )
- {
- case 'A' :
- reverse[--k] = 'T';
- break;
- case 'a' :
- reverse[--k] = 't';
- break;
- case 'T' :
- reverse[--k] = 'A';
- break;
- case 't' :
- reverse[--k] = 'a';
- break;
- case 'C' :
- reverse[--k] = 'G';
- break;
- case 'c' :
- reverse[--k] = 'g';
- break;
- case 'G' :
- reverse[--k] = 'C';
- break;
- case 'g' :
- reverse[--k] = 'c';
- break;
- default :
- reverse[--k] = symbol;
- break;
- }
- }
- if ( symbol == '>' && prev == '\n' )
- {
- heading = 1;
- if ( number >= 0 )
- {
- segment[number].length = i - j - 1;
- segment[number].rev = &(reverse[k]);
- if ( i - j - 1 > M ) M = i - j -1;
- }
- number++;
- j = i;
- segment[number].name = &(sequence[i]);
- segment[number].kind = 1;
- segment[number].list = 0;
- }
- if ( heading && symbol == '\n' )
- {
- heading = 0;
- segment[number].len = i - j;
- segment[number].seq = &(sequence[i]);
- j = i;
- }
-
- prev = symbol;
- }
- segment[number].length = i - j;
- reverse[--k] = '>';
- segment[number].rev = &(reverse[k]);
- if ( i - j > M ) M = i - j;
- seg_num = ++number;
- (void) fclose(Ap);
-
- Q = OPEN;
- R = EXTEND;
- R1 = LTEXTEN;
- /* set match and mismatch weights */
- for ( i = 0; i < 128 ; i++ )
- for ( j = 0; j < 128 ; j++ )
- if ((i|32) == (j|32) )
- V[i][j] = V1[i][j] = MATCH;
- else
- {
- V[i][j] = MISMAT;
- V1[i][j] = LTMISM;
- }
- for ( i = 0; i < 128 ; i++ )
- V['N'][i] = V[i]['N'] = MISMAT + 1;
- V1['N']['N'] = MISMAT + 1;
-
- over_len = OVERLEN;
- per_cent = PERCENT;
- edge_num = 0;
- fprintf(stderr,"\nINIT"); INIT(M);
- fprintf(stderr,"\nMAKE"); MAKE();
- fprintf(stderr,"\nPAIR"); PAIR(V,V1,Q,R,R1);
- fprintf(stderr,"\nASSEM"); ASSEM();
- fprintf(stderr,"\nREPAIR"); REPAIR();
- fprintf(stderr,"\nFORM_TREE"); FORM_TREE();
- /* GRAPH(); */
- fprintf(stderr,"\nSHOW\n"); SHOW();
- fclose(fout); /* dgg */
- }
-
- static char (*varray)[128]; /* substitution scores */
- static int q, r; /* gap penalties */
- static int qr; /* qr = q + r */
- static char (*v1)[128]; /* light substitution scores */
- static int r1; /* light extension penalty */
- static int qr1; /* qr1 = q + r1 */
-
- static int SCORE;
- static int STARI;
- static int STARJ;
- static int ENDI;
- static int ENDJ;
-
- static int *CC, *DD; /* saving matrix scores */
- static int *RR, *SS; /* saving start-points */
- static int *S; /* saving operations for diff */
-
- /* The following definitions are for function diff() */
-
- int diff();
- static int zero = 0; /* int type zero */
-
- #define gap(k) ((k) <= 0 ? 0 : q+r*(k)) /* k-symbol indel score */
-
- static int *sapp; /* Current script append ptr */
- static int last; /* Last script op appended */
-
- static int no_mat; /* number of matches */
-
- static int no_mis; /* number of mismatches */
-
- static int al_len; /* length of alignment */
- /* Append "Delete k" op */
- #define DEL(k) \
- { al_len += k; \
- if (last < 0) \
- last = sapp[-1] -= (k); \
- else \
- last = *sapp++ = -(k); \
- }
- /* Append "Insert k" op */
- #define INS(k) \
- { al_len += k; \
- if (last < 0) \
- { sapp[-1] = (k); *sapp++ = last; } \
- else \
- last = *sapp++ = (k); \
- }
-
- /* Append "Replace" op */
- #define REP \
- { last = *sapp++ = 0; \
- al_len += 1; \
- }
-
- INIT(M) int M;
- {
- register int j; /* row and column indices */
- char *ckalloc(); /* space-allocating function */
-
- /* allocate space for all vectors */
- j = (M + 1) * sizeof(int );
- CC = ( int * ) ckalloc(j);
- DD = ( int * ) ckalloc(j);
- RR = ( int * ) ckalloc(j);
- SS = ( int * ) ckalloc(j);
- S = ( int * ) ckalloc(2 * j);
- }
-
- /* Make a lookup table for words of lengths up to TUPLELEN in each sequence.
- The value of a word is used as an index to the table. */
- MAKE()
- {
- int hash[TUPLELEN]; /* values of words of lengths up to TUPLELEN */
- int *table; /* pointer to a lookup table */
- int *loc; /* pointer to a table of sequence locations */
- int size; /* size of a lookup table */
- int limit, digit, step; /* temporary variables */
- register int i, j, k, p, q; /* index varibles */
- char *ckalloc(); /* space-allocating function */
- char *A; /* pointer to a sequence */
- int M; /* length of a sequence */
-
- tuple = TUPLELEN - 1;
- for ( i = 0; i < 128; i++ )
- vert[i] = 4;
- vert['A'] = vert['a'] = 0;
- vert['C'] = vert['c'] = 1;
- vert['G'] = vert['g'] = 2;
- vert['T'] = vert['t'] = 3;
- vertc['A'] = vertc['a'] = 3;
- vertc['C'] = vertc['c'] = 2;
- vertc['G'] = vertc['g'] = 1;
- vertc['T'] = vertc['t'] = 0;
- for ( i = 0, j = 1, size = 0; i <= tuple ; i++, j *= 4 )
- {
- base[i] = size;
- power[i] = j;
- size = ( size + 1 ) * 4;
- }
- for ( j = 0; j <= tuple; j++ )
- hash[j] = 0;
- /* make a lookup table for each sequence */
- for ( i = 0; i < seg_num ; i++ )
- {
- A = segment[i].seq;
- M = segment[i].length;
- table = segment[i].lookup = (int * ) ckalloc(size * sizeof(int ));
- loc = segment[i].pos = (int * ) ckalloc((M + 1) * sizeof(int ));
- for ( j = 0; j < size; j++ )
- table[j] = 0;
- for ( k = 0, j = 1; j <= M; j++ )
- if ( ( digit = vert[A[j]] ) != 4 )
- {
- for ( p = tuple; p > 0; p-- )
- hash[p] = 4 * (hash[p-1] + 1) + digit;
- hash[0] = digit;
- step = j - k;
- limit = tuple <= step ? tuple : step;
- for ( p = 0; p < limit; p++ )
- if ( ! table[(q = hash[p])] ) table[q] = 1;
- if ( step > tuple )
- {
- loc[(p = j - tuple)] = table[(q = hash[tuple])];
- table[q] = p;
- }
- }
- else
- k = j;
- }
- }
-
- /* Perform pair-wise comparisons of sequences. The sequences not
- satisfying the necessary condition for overlap are rejected quickly.
- Those that do satisfy the condition are verified with a dynamic
- programming algorithm to see if overlaps exist. */
- PAIR(V,V1,Q,R,R1) char V[][128],V1[][128],Q,R,R1;
- {
- int endi, endj, stari, starj; /* endpoint and startpoint */
-
- short orienti, orientj; /* orientation of segments */
- short iscon; /* containment condition */
- int score; /* the max score */
- int count, limit; /* temporary variables */
-
- register int i, j, d; /* row and column indices */
- char *ckalloc(); /* space-allocating function */
- int rl, cl;
- char *A, *B;
- int M, N;
- overptr node1;
- int total; /* total number of pairs */
- int hit; /* number of pairs satisfying cond. */
- int CHECK();
-
- varray = V;
- q = Q;
- r = R;
- qr = q + r;
- v1 = V1;
- r1 = R1;
- qr1 = q + r1;
- total = hit = 0;
- limit = 2 * ( seg_num - 1 );
- for ( orienti = 0, d = 0; d < limit ; d++ )
- {
- i = d / 2;
- orienti = 1 - orienti;
- A = orienti ? segment[i].seq : segment[i].rev;
- M = segment[i].length;
- for ( j = i+1; j < seg_num ; j++ )
- {
- B = segment[j].seq;
- orientj = 1;
- N = segment[j].length;
- total += 1;
- if ( CHECK(orienti, i, j) )
- {
- hit += 1;
- SCORE = 0;
- big_pass(A,B,M,N,orienti,orientj);
- if ( SCORE )
- {
- score = SCORE;
- stari = ++STARI;
- starj = ++STARJ;
- endi = ENDI;
- endj = ENDJ;
- rl = endi - stari + 1;
- cl = endj - starj + 1;
- sapp = S;
- last = 0;
- al_len = no_mat = no_mis = 0;
- (void) diff(&A[stari]-1, &B[starj]-1,rl,cl,q,q);
- iscon = stari == 1 && endi == M || starj == 1 && endj == N;
- if ( no_mat >= al_len * per_cent &&
- ( al_len >= over_len || iscon ) )
- {
- node1 = ( overptr ) ckalloc( (int ) sizeof(over));
- if ( iscon )
- node1->kind = 0; /* containment */
- else
- {
- node1->kind = 1;
- edge_num++;
- } /* overlap */
- if ( endi == M && ( endj != N || starj != 1 ) ) /*i is 5'*/
- {
- node1->number = j;
- node1->host = i;
- node1->stari = stari;
- node1->endi = endi;
- node1->orienti = orienti;
- node1->starj = starj;
- node1->endj = endj;
- node1->orientj = orientj;
- }
- else /* j is 5' */
- {
- node1->number = i;
- node1->host = j;
- node1->stari = starj;
- node1->endi = endj;
- node1->orienti = orientj;
- node1->starj = stari;
- node1->endj = endi;
- node1->orientj = orienti;
- }
- node1->score = score;
- node1->length = al_len;
- node1->match = no_mat;
- count = node1->number == i ? j : i;
- node1->next = segment[count].list;
- segment[count].list = node1;
- if ( ! node1->kind )
- segment[count].kind = 0;
- }
- }
- }
- }
- }
- }
-
- /* Return 1 if two sequences satisfy the necessary condition for overlap,
- and 0 otherwise. Parameters first and second are the indices of segments,
- and parameter orient indicates the orientation of segment first. */
- int CHECK(orient,first,second) short orient;
- int first, second;
- {
- int limit, bound; /* maximum number of jumps */
- int small; /* smaller of limit and bound */
- float delta; /* cutoff factor */
- float cut; /* cutoff score */
- register int i; /* index variable */
- int t, q; /* temporary variables */
- int JUMP();
- int RJUMP();
- int JUMPC();
- int RJUMPC();
-
- delta = DELTA;
- if ( orient )
- limit = JUMP(CC, first, second, 1);
- else
- limit = JUMPC(CC, first, second);
- bound = RJUMP(DD, second, first, orient);
- small = limit <= bound ? limit : bound;
- cut = 0;
- for ( i = 1; i <= small; i++ )
- {
- if ( (t = DD[i] - 1) >= over_len && t >= cut
- && (q = CC[i] - 1) >= over_len && q >= cut )
- return (1);
- cut += delta;
- }
- if (DD[bound] >= delta*(bound-1) || CC[limit] >= delta*(limit-1))
- return (1);
- limit = JUMP(CC, second, first, orient);
- if ( orient )
- bound = RJUMP(DD, first, second, 1);
- else
- bound = RJUMPC(DD, first, second);
- small = limit <= bound ? limit : bound;
- cut = 0;
- for ( i = 1; i <= small; i++ )
- {
- if ( (t = DD[i] - 1) >= over_len && t >= cut
- && (q = CC[i] - 1) >= over_len && q >= cut )
- return (1);
- cut += delta;
- }
- return (0);
- }
-
- /* Compute a vector of lengths of jumps */
- int JUMP(H,one,two,orient) int H[], one, two;
- short orient;
- {
- char *A, *B; /* pointers to two sequences */
- int M, N; /* lengths of two sequences */
- int *table; /* pointer to a lookup table */
- int *loc; /* pointer to a location table */
- int value; /* value of a word */
- int maxd; /* maximum length of an identical diagonal */
- int d; /* length of current identical diagonal */
- int s; /* length of jumps */
- int k; /* number of jumps */
-
- register int i, j, p; /* index variables */
-
- A = segment[one].seq;
- M = segment[one].length;
- table = segment[one].lookup;
- loc = segment[one].pos;
- B = orient ? segment[two].seq : segment[two].rev;
- N = segment[two].length;
- for ( s = 1, k = 1; s <= N ; k++ )
- {
- maxd = 0;
- for ( value = -1, d = 0, j = s; d <= tuple && j <= N; j++, d++)
- {
- if ( vert[B[j]] == 4 ) break;
- value = 4 * (value + 1) + vert[B[j]];
- if ( ! table[value] ) break;
- }
- if ( d > tuple )
- {
- for ( p = table[value]; p ; p = loc[p] )
- {
- d = tuple + 1;
- for ( i = p+d, j = s+d; i <= M && j <= N; i++, j++, d++ )
- if ( A[i] != B[j] && vert[A[i]] != 4 && vert[B[j]] != 4 ) break;
- if ( maxd < d )
- maxd = d;
- if ( j > N ) break;
- }
- }
- else
- maxd = d;
- s += maxd + 1;
- H[k] = s;
- }
- return (k - 1);
- }
-
- /* Compute a vector of lengths of jumps for reverse complement of one */
- int JUMPC(H,one,two) int H[], one, two;
- {
- char *A, *B; /* pointers to two sequences */
- int M, N; /* lengths of two sequences */
- int *table; /* pointer to a lookup table */
- int *loc; /* pointer to a location table */
- int value; /* value of a word */
- int maxd; /* maximum length of an identical diagonal */
- int d; /* length of current identical diagonal */
- int s; /* length of jumps */
- int k; /* number of jumps */
-
- register int i, j, p; /* index variables */
-
- A = segment[one].rev;
- M = segment[one].length;
- table = segment[one].lookup;
- loc = segment[one].pos;
- B = segment[two].seq;
- N = segment[two].length;
- for ( s = 1, k = 1; s <= N ; k++ )
- {
- maxd = 0;
- for ( value = 0, d = 0, j = s; d <= tuple && j <= N; j++, d++)
- {
- if ( vert[B[j]] == 4 ) break;
- value += vertc[B[j]] * power[d];
- if ( ! table[value+base[d]] ) break;
- }
- if ( d > tuple )
- {
- for ( p = table[value+base[tuple]]; p ; p = loc[p] )
- {
- d = tuple + 1;
- for ( i = M+2-p, j = s+d; i <= M && j <= N; i++, j++, d++ )
- if ( A[i] != B[j] && vert[A[i]] != 4 && vert[B[j]] != 4 ) break;
- if ( maxd < d )
- maxd = d;
- if ( j > N ) break;
- }
- }
- else
- maxd = d;
- s += maxd + 1;
- H[k] = s;
- }
- return (k - 1);
- }
-
- /* Compute a vector of lengths of reverse jumps */
- int RJUMP(H,one,two,orient) int H[], one, two;
- short orient;
- {
- char *A, *B; /* pointers to two sequences */
- int N; /* length of a sequence */
- int *table; /* pointer to a lookup table */
- int *loc; /* pointer to a location table */
- int value; /* value of a word */
- int maxd; /* maximum length of an identical diagonal */
- int d; /* length of current identical diagonal */
- int s; /* length of jumps */
- int k; /* number of jumps */
-
- register int i, j, p; /* index variables */
-
- A = segment[one].seq;
- table = segment[one].lookup;
- loc = segment[one].pos;
- B = orient ? segment[two].seq : segment[two].rev;
- N = segment[two].length;
- for ( s = 1, k = 1; s <= N ; k++ )
- {
- maxd = 0;
- for (value = 0, d = 0, j = N+1-s; d <= tuple && j >= 1; j--, d++)
- {
- if ( vert[B[j]] == 4 ) break;
- value += vert[B[j]] * power[d];
- if ( ! table[value+base[d]] ) break;
- }
- if ( d > tuple )
- {
- for ( p = table[value+base[tuple]]; p ; p = loc[p] )
- {
- d = tuple + 1;
- for ( i = p-1, j = N+1-s-d; i >= 1 && j >= 1; i--, j--, d++ )
- if ( A[i] != B[j] && vert[A[i]] != 4 && vert[B[j]] != 4 ) break;
- if ( maxd < d )
- maxd = d;
- if ( j < 1 ) break;
- }
- }
- else
- maxd = d;
- s += maxd + 1;
- H[k] = s;
- }
- return (k - 1);
- }
-
- /* Compute a vector of lengths of reverse jumps for reverse complement */
- int RJUMPC(H,one,two) int H[], one, two;
- {
- char *A, *B; /* pointers to two sequences */
- int M, N; /* lengths of two sequences */
- int *table; /* pointer to a lookup table */
- int *loc; /* pointer to a location table */
- int value; /* value of a word */
- int maxd; /* maximum length of an identical diagonal */
- int d; /* length of current identical diagonal */
- int s; /* length of jumps */
- int k; /* number of jumps */
-
- register int i, j, p; /* index variables */
-
- A = segment[one].rev;
- M = segment[one].length;
- table = segment[one].lookup;
- loc = segment[one].pos;
- B = segment[two].seq;
- N = segment[two].length;
- for ( s = 1, k = 1; s <= N ; k++ )
- {
- maxd = 0;
- for (value = -1, d = 0, j = N+1-s; d <= tuple && j >= 1; j--, d++)
- {
- if ( vert[B[j]] == 4 ) break;
- value = 4 * (value + 1) + vertc[B[j]];
- if ( ! table[value] ) break;
- }
- if ( d > tuple )
- {
- for ( p = table[value]; p ; p = loc[p] )
- {
- d = tuple + 1;
- i = M - p - tuple;
- for ( j = N-s-tuple; i >= 1 && j >= 1; i--, j--, d++ )
- if ( A[i] != B[j] && vert[A[i]] != 4 && vert[B[j]] != 4 ) break;
- if ( maxd < d )
- maxd = d;
- if ( j < 1 ) break;
- }
- }
- else
- maxd = d;
- s += maxd + 1;
- H[k] = s;
- }
- return (k - 1);
- }
-
- /* Construct contigs */
- ASSEM()
- {
- char *ckalloc(); /* space-allocating function */
- register int i, j, k; /* index variables */
- overptr node1, x, y; /* temporary pointer */
- int five, three; /* indices of 5' and 3' segments */
- short orienti; /* orientation of 5' segment */
- short orientj; /* orientation of 3' segment */
- short sorted; /* boolean variable */
-
- contigs = ( struct CONS * ) ckalloc( seg_num * sizeof(struct CONS));
- for ( i = 0; i < seg_num; i++ )
- {
- contigs[i].isfive[0] = contigs[i].isthree[0] = 1;
- contigs[i].isfive[1] = contigs[i].isthree[1] = 1;
- contigs[i].other = i;
- contigs[i].group = contigs[i].child = -1;
- contigs[i].brother = contigs[i].father = -1;
- }
- for ( i = 0; i < seg_num; i++ )
- if ( ! segment[i].kind )
- for ( ; ; )
- {
- for ( y = segment[i].list; y->kind; y = y->next )
- ;
- for ( x = y->next; x != 0; x = x->next )
- if ( ! x->kind && x->score > y->score )
- y = x;
- for ( j = y->number; (k = contigs[j].father) != -1; j = k )
- ;
- if ( j != i )
- {
- contigs[i].father = j = y->number;
- contigs[i].brother = contigs[j].child;
- contigs[j].child = i;
- contigs[i].node[1] = y;
- break;
- }
- else
- {
- if ( segment[i].list->number == y->number )
- segment[i].list = y->next;
- else
- {
- for ( x = segment[i].list; x->next->number != y->number ; )
- x = x->next;
- x->next = y->next;
- }
- for ( x = segment[i].list; x != 0 && x->kind; x = x->next )
- ;
- if ( x == 0 )
- {
- segment[i].kind = 1;
- break;
- }
- }
- }
- edge = ( overptr * ) ckalloc( edge_num * sizeof(overptr) );
- for ( j = 0, i = 0; i < seg_num; i++ )
- if ( segment[i].kind )
- for ( node1 = segment[i].list; node1 != 0; node1 = node1->next )
- if ( segment[node1->number].kind )
- edge[j++] = node1;
- edge_num = j;
- for ( i = edge_num - 1; i > 0; i-- )
- {
- sorted = 1;
- for ( j = 0; j < i; j++ )
- if ( edge[j]->score < edge[j+1]->score )
- {
- node1 = edge[j];
- edge[j] = edge[j+1];
- edge[j+1] = node1;
- sorted = 0;
- }
- if ( sorted )
- break;
- }
- for ( k = 0; k < edge_num; k++ )
- {
- five = edge[k]->host;
- three = edge[k]->number;
- orienti = edge[k]->orienti;
- orientj = edge[k]->orientj;
- if ( contigs[five].isthree[orienti] &&
- contigs[three].isfive[orientj] && contigs[five].other != three )
- {
- contigs[five].isthree[orienti] = 0;
- contigs[three].isfive[orientj] = 0;
- contigs[five].next[orienti] = three;
- contigs[five].orient[orienti] = orientj;
- contigs[five].node[orienti] = edge[k];
- contigs[three].isthree[(j = 1 - orientj)] = 0;
- contigs[five].isfive[(i = 1 - orienti)] = 0;
- contigs[three].next[j] = five;
- contigs[three].orient[j] = i;
- contigs[three].node[j] = edge[k];
- i = contigs[three].other;
- j = contigs[five].other;
- contigs[i].other = j;
- contigs[j].other = i;
- }
- }
- }
-
- REPAIR()
- {
- int endi, endj, stari, starj; /* endpoint and startpoint */
-
- short orienti, orientj; /* orientation of segments */
- short isconi, isconj; /* containment condition */
- int score; /* the max score */
- int i, j, f, d, e; /* row and column indices */
- char *ckalloc(); /* space-allocating function */
- char *A, *B;
- int M, N;
- overptr node1;
- int piece_num; /* The number of pieces */
- int count, limit;
- int number;
- int hit;
-
- piece = ( struct VX * ) ckalloc( seg_num * sizeof(struct VX));
- for ( j = 0, i = 0; i < seg_num; i++ )
- if (segment[i].kind && (contigs[i].isfive[1] || contigs[i].isfive[0]))
- piece[j++].id = i;
- piece_num = j;
- for ( i = 0; i < piece_num; i++ )
- {
- piece[i].kind = 1;
- piece[i].list = 0;
- }
- limit = 2 * ( piece_num - 1 );
- hit = number = 0;
- for ( orienti = 0, d = 0; d < limit ; d++ )
- {
- i = piece[(e = d / 2)].id;
- orienti = 1 - orienti;
- A = orienti ? segment[i].seq : segment[i].rev;
- M = segment[i].length;
- for ( f = e+1; f < piece_num ; f++ )
- {
- j = piece[f].id;
- B = segment[j].seq;
- orientj = 1;
- N = segment[j].length;
- SCORE = 0;
- hit++;
- big_pass(A,B,M,N,orienti,orientj);
- if ( SCORE > CUTOFF )
- {
- score = SCORE;
- stari = ++STARI;
- starj = ++STARJ;
- endi = ENDI;
- endj = ENDJ;
- isconi = stari == 1 && endi == M;
- isconj = starj == 1 && endj == N;
- node1 = ( overptr ) ckalloc( (int ) sizeof(over));
- if ( isconi || isconj )
- node1->kind = 0; /* containment */
- else
- {
- node1->kind = 1;
- number++;
- } /* overlap */
- if ( endi == M && ! isconj ) /*i is 5'*/
- {
- node1->number = j;
- node1->host = i;
- node1->ind = f;
- node1->stari = stari;
- node1->endi = endi;
- node1->orienti = orienti;
- node1->starj = starj;
- node1->endj = endj;
- node1->orientj = orientj;
- }
- else /* j is 5' */
- {
- node1->number = i;
- node1->host = j;
- node1->ind = e;
- node1->stari = starj;
- node1->endi = endj;
- node1->orienti = orientj;
- node1->starj = stari;
- node1->endj = endi;
- node1->orientj = orienti;
- }
- node1->score = score;
- count = node1->number == i ? f : e;
- node1->next = piece[count].list;
- piece[count].list = node1;
- if ( ! node1->kind )
- piece[count].kind = 0;
- }
- }
- }
- REASSEM(piece_num, number);
- }
-
- /* Construct contigs */
- REASSEM(piece_num, number) int piece_num, number;
- {
- char *ckalloc(); /* space-allocating function */
- int i, j, k, d; /* index variables */
- overptr node1, x, y; /* temporary pointer */
- int five, three; /* indices of 5' and 3' segments */
- short orienti; /* orientation of 5' segment */
- short orientj; /* orientation of 3' segment */
- short sorted; /* boolean variable */
-
- for ( d = 0; d < piece_num; d++ )
- if ( ! piece[d].kind )
- for ( i = piece[d].id ; ; )
- {
- for ( y = piece[d].list; y->kind; y = y->next )
- ;
- for ( x = y->next; x != 0; x = x->next )
- if ( ! x->kind && x->score > y->score )
- y = x;
- for ( j = y->number; (k = contigs[j].father) != -1; j = k )
- ;
- if ( j != i && RECONCILE(y,&piece_num,&number) )
- {
- contigs[i].father = j = y->number;
- contigs[i].brother = contigs[j].child;
- contigs[j].child = i;
- contigs[i].node[1] = y;
- segment[i].kind = 0;
- break;
- }
- else
- {
- if ( piece[d].list->number == y->number )
- piece[d].list = y->next;
- else
- {
- for ( x = piece[d].list; x->next->number != y->number ; )
- x = x->next;
- x->next = y->next;
- }
- for ( x = piece[d].list; x != 0 && x->kind; x = x->next )
- ;
- if ( x == 0 )
- {
- piece[d].kind = 1;
- break;
- }
- }
- }
- if ( number > edge_num )
- edge = ( overptr * ) ckalloc( number * sizeof(overptr) );
- for ( j = 0, d = 0; d < piece_num; d++ )
- if ( piece[d].kind )
- for ( node1 = piece[d].list; node1 != 0; node1 = node1->next )
- if ( piece[node1->ind].kind )
- edge[j++] = node1;
- edge_num = j;
- for ( i = edge_num - 1; i > 0; i-- )
- {
- sorted = 1;
- for ( j = 0; j < i; j++ )
- if ( edge[j]->score < edge[j+1]->score )
- {
- node1 = edge[j];
- edge[j] = edge[j+1];
- edge[j+1] = node1;
- sorted = 0;
- }
- if ( sorted )
- break;
- }
- for ( k = 0; k < edge_num; k++ )
- {
- five = edge[k]->host;
- three = edge[k]->number;
- orienti = edge[k]->orienti;
- orientj = edge[k]->orientj;
- if ( contigs[five].isthree[orienti] &&
- contigs[three].isfive[orientj] && contigs[five].other != three )
- {
- contigs[five].isthree[orienti] = 0;
- contigs[three].isfive[orientj] = 0;
- contigs[five].next[orienti] = three;
- contigs[five].orient[orienti] = orientj;
- contigs[five].node[orienti] = edge[k];
- contigs[three].isthree[(j = 1 - orientj)] = 0;
- contigs[five].isfive[(i = 1 - orienti)] = 0;
- contigs[three].next[j] = five;
- contigs[three].orient[j] = i;
- contigs[three].node[j] = edge[k];
- i = contigs[three].other;
- j = contigs[five].other;
- contigs[i].other = j;
- contigs[j].other = i;
- }
- }
- }
-
- RECONCILE(y, pp,nn) overptr y;
- int *pp,*nn;
- {
- short orienti, orientj; /* orientation of segments */
- short orientk, orientd; /* orientation of segments */
- int i, j, k, d, f; /* row and column indices */
- char *ckalloc(); /* space-allocating function */
- char *A, *B;
- int M, N;
- overptr node1;
-
- k = y->host;
- d = y->number;
- orientk = y->orienti;
- orientd = y->orientj;
- if ( ! contigs[k].isthree[orientk] )
- {
- if ( ! piece[y->ind].kind ) return (0);
- if ( contigs[d].isthree[orientd] )
- {
- orienti = orientd;
- i = d;
- orientj = contigs[k].orient[orientk];
- j = contigs[k].next[orientk];
- }
- else
- return (0);
- }
- else
- if ( ! contigs[k].isfive[orientk] )
- {
- if ( ! piece[y->ind].kind ) return (0);
- if ( contigs[d].isfive[orientd] )
- {
- orienti = contigs[k].orient[1-orientk];
- orienti = 1 - orienti;
- i = contigs[k].next[1-orientk];
- orientj = orientd;
- j = d;
- }
- else
- return (0);
- }
- else
- return (0);
- A = orienti ? segment[i].seq : segment[i].rev;
- M = segment[i].length;
- B = orientj ? segment[j].seq : segment[j].rev;
- N = segment[j].length;
- SCORE = 0;
- big_pass(A,B,M,N,orienti,orientj);
- if ( SCORE > CUTOFF && ENDI - STARI > over_len && ENDI == M && STARJ == 0 )
- {
- node1 = ( overptr ) ckalloc( (int ) sizeof(over));
- node1->kind = 1;
- node1->host = i;
- node1->number = j;
- node1->stari = ++STARI;
- node1->endi = ENDI;
- node1->orienti = orienti;
- node1->starj = ++STARJ;
- node1->endj = ENDJ;
- node1->orientj = orientj;
- node1->score = SCORE;
- piece[*pp].kind = 1;
- if ( i == d )
- {
- node1->ind = *pp;
- node1->next = piece[y->ind].list;
- piece[y->ind].list = node1;
- piece[*pp].id = j;
- piece[*pp].list = 0;
- }
- else
- {
- node1->ind = y->ind;
- piece[*pp].list = node1;
- node1->next = 0;
- piece[*pp].id = i;
- }
- (*nn)++;
- (*pp)++;
- f = contigs[k].other;
- if ( ! contigs[k].isthree[orientk] )
- {
- contigs[j].isfive[orientj] = 1;
- contigs[j].isthree[1 - orientj] = 1;
- contigs[k].isthree[orientk] = 1;
- contigs[k].isfive[1 - orientk] = 1;
- contigs[f].other = j;
- contigs[j].other = f;
- }
- else
- {
- contigs[i].isthree[orienti] = 1;
- contigs[i].isfive[1 - orienti] = 1;
- contigs[k].isfive[orientk] = 1;
- contigs[k].isthree[1 - orientk] = 1;
- contigs[f].other = i;
- contigs[i].other = f;
- }
- contigs[k].other = k;
- return (1);
- }
- return (0);
- }
-
- /* Construct a tree of overlapping-containment segments */
- FORM_TREE()
- {
- register int i, j, k; /* index variables */
- char *ckalloc(); /* space-allocating function */
- overptr node1; /* temporary pointer */
- short orient; /* orientation of segment */
- int group; /* serial number of contigs */
- char *A, *B; /* pointers to segment sequences */
- int stari, endi, starj, endj;/* positions where alignment begins */
- int M, N; /* lengths of segment sequences */
- int count; /* temporary variables */
-
- mtree = ( struct TTREE * ) ckalloc( seg_num * sizeof(struct TTREE));
- for ( i = 0; i < seg_num; i++ )
- {
- mtree[i].head = 0;
- mtree[i].next = mtree[i].child = mtree[i].brother = -1;
- }
- for ( group = 0, i = 0; i < seg_num; i++ )
- if ( segment[i].kind && contigs[i].group < 0 &&
- ( contigs[i].isfive[1] || contigs[i].isfive[0] ) )
- {
- orient = contigs[i].isfive[1] ? 1 : 0;
- mtree[i].head = 1;
- for ( j = i; ; )
- {
- contigs[j].group = group;
- mtree[j].orient = orient;
- SORT(j, orient);
- if ( contigs[j].isthree[orient] )
- break;
- else
- {
- k = contigs[j].next[orient];
- node1 = contigs[j].node[orient];
- if ( j == node1->host )
- {
- stari = node1->stari;
- endi = node1->endi;
- starj = node1->starj;
- endj = node1->endj;
- A = node1->orienti ? segment[j].seq : segment[j].rev;
- B = node1->orientj ? segment[k].seq : segment[k].rev;
- }
- else
- {
- M = segment[j].length;
- stari = M + 1 - node1->endj;
- endi = M + 1 - node1->starj;
- N = segment[k].length;
- starj = N + 1 - node1->endi;
- endj = N + 1 - node1->stari;
- A = node1->orientj ? segment[j].rev : segment[j].seq;
- B = node1->orienti ? segment[k].rev : segment[k].seq;
- }
- M = endi - stari + 1;
- N = endj - starj + 1;
- sapp = S;
- last = 0;
- al_len = no_mat = no_mis = 0;
- (void) diff(&A[stari]-1, &B[starj]-1,M,N,q,q);
- count = ( (N = sapp - S) + 1 ) * sizeof(int );
- mtree[k].script = ( int * ) ckalloc( count );
- for ( M = 0; M < N; M++)
- mtree[k].script[M] = S[M];
- mtree[k].size = N;
- mtree[k].begin = stari;
- mtree[j].next = k;
- orient = contigs[j].orient[orient];
- j = k;
- }
- }
- group++;
- }
- }
-
- /* Sort the children of each node by the `begin' field */
- SORT(seg, ort) int seg;
- short ort;
- {
- register int i, j, k; /* index variables */
- char *ckalloc(); /* space-allocating function */
- overptr node1; /* temporary pointer */
- short orient; /* orientation of segment */
- char *A, *B; /* pointers to segment sequences */
- int stari, endi, starj, endj;/* positions where alignment begins */
- int M, N; /* lengths of segment sequences */
- int count; /* temporary variables */
-
- for ( j = contigs[seg].child; j != -1; j = contigs[j].brother )
- {
- node1 = contigs[j].node[1];
- if ( ort == node1->orientj )
- {
- stari = node1->starj;
- endi = node1->endj;
- starj = node1->stari;
- endj = node1->endi;
- A = node1->orientj ? segment[seg].seq : segment[seg].rev;
- B = node1->orienti ? segment[j].seq : segment[j].rev;
- orient = node1->orienti;
- }
- else
- {
- M = segment[seg].length;
- stari = M + 1 - node1->endj;
- endi = M + 1 - node1->starj;
- N = segment[j].length;
- starj = N + 1 - node1->endi;
- endj = N + 1 - node1->stari;
- A = node1->orientj ? segment[seg].rev : segment[seg].seq;
- B = node1->orienti ? segment[j].rev : segment[j].seq;
- orient = 1 - node1->orienti;
- }
- M = endi - stari + 1;
- N = endj - starj + 1;
- sapp = S;
- last = 0;
- al_len = no_mat = no_mis = 0;
- (void) diff(&A[stari]-1, &B[starj]-1,M,N,q,q);
- count = ( (M = sapp - S ) + 1 ) * sizeof(int );
- mtree[j].script = ( int * ) ckalloc( count );
- for ( k = 0; k < M; k++)
- mtree[j].script[k] = S[k];
- mtree[j].size = M;
- mtree[j].begin = stari;
- mtree[j].orient = orient;
- if ( mtree[seg].child == -1 )
- mtree[seg].child = j;
- else
- {
- i = mtree[seg].child;
- if ( mtree[i].begin >= stari )
- {
- mtree[j].brother = i;
- mtree[seg].child = j;
- }
- else
- {
- M = mtree[i].brother;
- for ( ; M != -1; i = M, M = mtree[M].brother )
- if ( mtree[M].begin >= stari ) break;
- mtree[j].brother = M;
- mtree[i].brother = j;
- }
- }
- SORT(j, orient);
- }
- }
-
-
- /* Display the alignments of segments */
- SHOW()
- {
- register int i, j, k; /* index variables */
- char *ckalloc(); /* space-allocating function */
- int n; /* number of working segments */
- int limit; /* number of slots in work */
- int col; /* number of output columns prepared */
- short done; /* tells if current group is done */
- rowptr root; /* pointer to root of op tree */
- int sym[6]; /* occurrance counts for six chars */
- char c; /* temp variable */
- rowptr t, w, yy; /* temp pointer */
- int x; /* temp variables */
- int group; /* Contigs number */
- char conlit[20], *a; /* String form of contig number */
- char *spt; /* pointer to the start of consensus */
-
- work = ( rowptr * ) ckalloc( seg_num * sizeof(rowptr));
- group = 0;
- yy = 0;
- for ( j = 0; j < 6; j++ )
- sym[j] = 0;
- n = limit = col = 0;
- for ( i = 0; i < seg_num; i++ )
- if ( mtree[i].head )
- {
- (void) sprintf(conlit, "\n>Contig-%d\n", group);
- for ( a = conlit; *a; )
- *allconpt++ = *a++;
- /* Mod by S.S.
- (void) printf("\n#Contig %d\n\n", group++);
- */
- group++;
- done = 0;
- ENTER(&limit, &n, i, col, yy);
- root = work[0];
- spt = allconpt;
- while ( ! done )
- {
- for ( j = 0; j < n; j++ ) /* get segments into work */
- {
- t = work[j];
- k = t->id;
- if ((x = mtree[k].next) != -1 && mtree[x].begin == t->loc)
- {
- ENTER(&limit, &n, x, col, t);
- mtree[k].next = -1;
- }
- for ( x = mtree[k].child; x != -1; x = mtree[x].brother )
- if ( mtree[x].begin == t->loc )
- {
- ENTER(&limit, &n, x, col, t);
- mtree[k].child = mtree[x].brother;
- }
- else
- break;
- }
- COLUMN(root); /* determine next column */
- root->c = root->kind;
- for ( t = head; t != 0; t = t->link )
- t->c = t->kind;
- for ( j = 0; j < n; j++ )
- {
- t = work[j];
- if ( t->done )
- *t->a++ = ' ';
- else
- {
- if ( t->c == 'L' )
- {
- if ( t->loc == 1 )
- t->offset = allconpt - spt;
- c = *t->a++ = t->seq[t->loc++];
- }
- else
- if ( t->loc > 1 )
- c = *t->a++ = '-';
- else
- c = *t->a++ = ' ';
- if ( c != ' ' )
- if ( c == '-' )
- sym[5] += 1;
- else
- sym[vert[c]] += 1;
- t->c = ' ';
- }
- }
- /* determine consensus char */
- k = sym[0] + sym[1] + sym[2] + sym[3] + sym[4];
- if ( k < sym[5] )
- *allconpt++ = '-';
- else
- if ( sym[0] == sym[1] && sym[1] == sym[2] &&
- sym[2] == sym[3] )
- *allconpt++ = 'N';
- else
- {
- k = sym[0];
- c = 'A';
- if ( k < sym[1] ) {
- k = sym[1];
- c = 'C';
- }
- if ( k < sym[2] ) {
- k = sym[2];
- c = 'G';
- }
- if ( k < sym[3] ) c = 'T';
- *allconpt++ = c;
- }
- for ( j = 0; j < 6; j++ )
- sym[j] = 0;
- for ( t = head; t != 0; t = t->link )
- {
- NEXTOP(t);
- if ( t->done ) /* delete it from op tree */
- {
- w = t->father;
- if ( w->child->id == t->id )
- w->child = t->brother;
- else
- {
- w = w->child;
- for ( ; w->brother->id != t->id; w = w->brother )
- ;
- w->brother = t->brother;
- }
- }
- }
- if ( root->loc > root->length ) /* check root node */
- {
- root->done = 1;
- if ( (w = root->child) != 0 )
- {
- w->father = 0;
- root = w;
- }
- else
- done = 1;
- }
- col++;
- if ( col == LINELEN || done ) /* output */
- {
- col = 0;
- for ( j = 0; j < n; j++ )
- {
- t = work[j];
- if ( t->done )
-
- /*
- Mod by S.S.
- { (void) printf("#");
- for ( a = t->name; *a; a++ )
- (void) printf("%c", *a);
- */
-
- { /* dgg output == fasta compatible */
- short ii, isrev;
- isrev= (t->name[strlen(t->name)] == '-');
-
- (void) fprintf(fout,">cap_");
- for ( a = t->name; *a; a++ ) (void) fprintf(fout,"%c", *a);
- (void) fprintf(fout,"\n");
-
- /* need to print offset (.) and reverse as needed */
- for ( ii = 0, k=0 ; ii < t->offset; ii++ ) {
- k++;
- (void) fprintf(fout,"%c", '.');
- if ( k == LINELEN ) {
- (void) fprintf(fout,"\n");
- k = 0;
- }
- }
-
- if (isrev) {
- for ( a = t->a-1 ; a != t->line-1; a-- )
- if ( *a != ' ' ) {
- k++;
- (void) fprintf(fout,"%c", nuccomp(*a));
- if ( k == LINELEN ) {
- (void) fprintf(fout,"\n");
- k = 0;
- }
- }
- }
- else {
- for ( /*k = 0,*/ a = t->line ; a != t->a; a++ )
- if ( *a != ' ' ) {
- k++;
- (void) fprintf(fout,"%c", *a);
- if ( k == LINELEN ) {
- (void) fprintf(fout,"\n");
- k = 0;
- }
- }
- }
- if (k) (void) fprintf(fout,"\n");
- }
-
- /***** s.s.
- {
- int jj;
- (void) printf("{\nname ");
- for(jj=0;jj<strlen(t->name)-1;jj++)
- (void) printf("%c", t->name[jj]);
- printf("\nstrandedness %c\n",
- t->name[strlen(t->name)] == '+'? '1':'2');
-
- printf("offset %d\ntype DNA\ngroup-ID %d\nsequence \"\n",
- t->offset,group);
- for ( k = 0, a = t->line ; a != t->a; a++ )
- if ( *a != ' ' )
- {
- k++;
- (void) printf("%c", *a);
- if ( k == LINELEN )
- {
- (void) printf("\n");
- k = 0;
- }
- }
-
- (void) printf("\"\n}\n");
- }
- ******/
- if ( t->linesize - (t->a - t->line) < LINELEN + 3 )
- ALOC_SEQ(t);
- }
- if ( !done )
- {
- for ( k = j = n - 1; j >= 0; j-- )
- if ( work[j]->done )
- {
- t = work[j];
- for ( x = j; x < k; x++ )
- work[x] = work[x+1];
- work[k--] = t;
- }
- n = k + 1;
- }
- else
- n = 0;
- }
- }
- }
-
- *allconpt = 0; /* dgg, term string */
- /* (void) fprintf(fout,"\n>Consensus\n"); -- dgg added back */
- for ( a = allconsen, k=0 ; *a; a++ ) {
- k++;
- (void) fprintf(fout,"%c", *a);
- if ( k == LINELEN ) {
- (void) fprintf(fout,"\n");
- k = 0;
- }
- }
- fprintf(fout,"\n\n");
- }
-
- /* allocate more space for output fragment */
- ALOC_SEQ(t) rowptr t;
- {
- char *start, *end, *p;
- t->linesize *= 2;
- start = t->line;
- end = t->a;
- t->line = ( char * ) ckalloc( t->linesize * sizeof(char));
- for ( t->a = t->line, p = start ; p != end; )
- *t->a++ = *p++;
- free(start);
- }
-
- /* enter a segment into working set */
- ENTER(b, d, id, pos, par) int *b, *d, id, pos;
- rowptr par;
- {
- int i;
- char *ckalloc(); /* space-allocating function */
- rowptr t;
-
- if ( *b <= *d )
- {
- work[*b] = ( rowptr ) ckalloc( (int ) sizeof(row));
- work[*b]->line = ( char * ) ckalloc( SEQLEN * sizeof(char));
- work[*b]->linesize = SEQLEN;
- *b += 1;
- }
- t = work[*d];
- *d += 1;
- t->a = t->line;
- for ( i = 0; i < pos; i++ )
- *t->a++ = ' ';
- t->c = ' ';
- t->seq = mtree[id].orient ? segment[id].seq : segment[id].rev;
- t->length = segment[id].length;
- t->id = id;
- if ( par != 0 )
- {
- t->s = mtree[id].script;
- t->size = mtree[id].size;
- }
- t->op = 0;
- for ( i = 1; i <= segment[id].len && i <= NAMELEN; i++ )
- t->name[i-1] = segment[id].name[i];
- if ( mtree[id].orient )
- t->name[i-1] = '+';
- else
- t->name[i-1] = '-';
- t->name[i] = '\0';
- t->done = 0;
- t->loc = 1;
- t->child = 0;
- t->father = par;
- if ( par != 0 )
- {
- t->brother = par->child;
- par->child = t;
- NEXTOP(t);
- }
- }
-
- /* get the next operation */
- NEXTOP(t) rowptr t;
- {
- if ( t->size || t->op )
- if ( t->op == 0 && *t->s == 0 )
- {
- t->op = *t->s++;
- t->size--;
- t->up = 'L';
- t->dw = 'L';
- }
- else
- {
- if ( t->op == 0 )
- {
- t->op = *t->s++;
- t->size--;
- }
- if ( t->op > 0 )
- {
- t->up = '-';
- t->dw = 'L';
- t->op--;
- }
- else
- {
- t->up = 'L';
- t->dw = '-';
- t->op++;
- }
- }
- else
- if ( t->loc > t->length )
- t->done = 1;
- }
-
- COLUMN(x) rowptr x;
- {
- rowptr y;
- rowptr start, end; /* first and last nodes for subtree */
-
- if ( x->child == 0 )
- {
- head = tail = 0;
- x->kind = 'L';
- }
- else
- {
- start = end = 0;
- x->kind = 'L';
- for ( y = x->child; y != 0; y = y->brother )
- {
- COLUMN(y);
- if ( x->kind == y->up )
- if ( y->kind == y->dw )
- {
- if ( head == 0 )
- {
- y->link = 0;
- head = tail = y;
- }
- else
- {
- y->link = head;
- head = y;
- }
- if ( end == 0 )
- start = head;
- else
- end->link = head;
- end = tail;
- }
- else
- if ( y->kind == '-' )
- {
- start = head;
- end = tail;
- x->kind = '-';
- }
- else
- {
- y->link = 0;
- y->kind = '-';
- if ( end == 0 )
- start = end = y;
- else
- {
- end->link = y;
- end = y;
- }
- }
- else
- if ( y->kind == y->dw )
- if ( x->kind == '-' )
- ;
- else
- {
- if ( head == 0 )
- {
- y->link = 0;
- head = tail = y;
- }
- else
- {
- y->link = head;
- head = y;
- }
- start = head;
- end = tail;
- x->kind = '-';
- }
- else
- if ( x->kind == '-' )
- if ( y->kind == '-' )
- {
- if ( end == 0 )
- {
- start = head;
- end = tail;
- }
- else
- if ( head == 0 )
- /* code folded from here */
- ;
- /* unfolding */
- else
- {
- /* code folded from here */
- end->link = head;
- end = tail;
- /* unfolding */
- }
- }
- else
- ;
- else
- {
- start = head;
- end = tail;
- x->kind = '-';
- }
- }
- head = start;
- tail = end;
- }
- }
-
- /* Display a summary of contigs */
- GRAPH()
- {
- int i, j, k; /* index variables */
- int group; /* serial number of contigs */
- char name[NAMELEN+2]; /* name of segment */
- char *t; /* temp var */
- int length; /* length of name */
-
- (void) printf("\nOVERLAPS CONTAINMENTS\n\n");
- group = 1;
- for ( i = 0; i < seg_num; i++ )
- if ( mtree[i].head )
- {
- (void) printf("******************* Contig %d ********************\n",
- group++ );
- for ( j = i; j != -1; j = mtree[j].next )
- {
- length = segment[j].len;
- t = segment[j].name + 1;
- for ( k = 0; k < length && k < NAMELEN; k++ )
- name[k] = *t++;
- if ( mtree[j].orient )
- name[k] = '+';
- else
- name[k] = '-';
- name[k+1] = '\0';
- (void) printf("%s\n", name);
- CONTAIN(mtree[j].child, name);
- }
- }
- }
-
- CONTAIN(id, f) int id;
- char *f;
- {
- int k; /* index variable */
- char name[NAMELEN+2]; /* name of segment */
- char *t; /* temp var */
- int length; /* length of name */
-
- if ( id != -1 )
- {
- length = segment[id].len;
- t = segment[id].name + 1;
- for ( k = 0; k < length && k < NAMELEN; k++ )
- name[k] = *t++;
- if ( mtree[id].orient )
- name[k] = '+';
- else
- name[k] = '-';
- name[k+1] = '\0';
- (void) printf(" %s is in %s\n", name,f);
- CONTAIN(mtree[id].child, name);
- CONTAIN(mtree[id].brother, f);
- }
- }
-
- big_pass(A,B,M,N,orienti,orientj) char A[],B[];
- int M,N;
- short orienti, orientj;
- {
- register int i, j; /* row and column indices */
- register int c; /* best score at current point */
- register int f; /* best score ending with insertion */
- register int d; /* best score ending with deletion */
- register int p; /* best score at (i-1, j-1) */
- register int ci; /* end-point associated with c */
-
- register int di; /* end-point associated with d */
- register int fi; /* end-point associated with f */
- register int pi; /* end-point associated with p */
- char *va; /* pointer to varray(A[i], B[j]) */
- int x1, x2; /* regions of A before x1 or after x2 are lightly penalized */
- int y1, y2; /* regions of B before y1 or after y2 are lightly penalized */
- short heavy; /* 1 = heavy penalty */
- int ex, gx; /* current gap penalty scores */
-
- /* determine x1, x2, y1, y2 */
- if ( POS5 >= POS3 )
- fatal("The value for POS5 must be less than the value for POS3");
- if ( orienti )
- {
- x1 = POS5 >= M ? 1 : POS5;
- x2 = POS3 >= M ? M : POS3;
- }
- else
- {
- x1 = POS3 >= M ? 1 : M - POS3 + 1;
- x2 = POS5 >= M ? M : M - POS5 + 1;
- }
- if ( orientj )
- {
- y1 = POS5 >= N ? 1 : POS5;
- y2 = POS3 >= N ? N : POS3;
- }
- else
- {
- y1 = POS3 >= N ? 1 : N - POS3 + 1;
- y2 = POS5 >= N ? N : N - POS5 + 1;
- }
- if ( x1 + 1 <= x2 ) x1++;
- if ( y1 + 1 <= y2 ) y1++;
- heavy = 0;
-
- /* Compute the matrix.
- CC : the scores of the current row
- RR : the starting point that leads to score CC
- DD : the scores of the current row, ending with deletion
- SS : the starting point that leads to score DD */
- /* Initialize the 0 th row */
- for ( j = 1; j <= N ; j++ )
- {
- CC[j] = 0;
- DD[j] = - (q);
- RR[j] = SS[j] = -j;
- }
- for ( i = 1; i <= M; i++)
- {
- if ( i == x1 ) heavy = 1 - heavy;
- if ( i == x2 ) heavy = 1 - heavy;
- ex = r1;
- gx = qr1;
- va = v1[A[i]];
- c = 0; /* Initialize column 0 */
- f = - (q);
- ci = fi = i;
- p = 0;
- pi = i - 1;
- for ( j = 1 ; j <= N ; j++ )
- {
- if ( j == y1 )
- {
- if ( heavy )
- {
- ex = r;
- gx = qr;
- /*
- S.S.
- va = varray[A[i]];
- */
- va = varray[A[i]];
- }
- }
- if ( j == y2 )
- {
- if ( heavy )
- {
- ex = r1;
- gx = qr1;
- va = v1[A[i]];
- }
- }
- if ( ( f = f - ex ) < ( c = c - gx ) )
- {
- f = c;
- fi = ci;
- }
- di = SS[j];
- if ( ( d = DD[j] - ex ) < ( c = CC[j] - gx ) )
- {
- d = c;
- di = RR[j];
- }
- c = p+va[B[j]]; /* diagonal */
- ci = pi;
- if ( c < d )
- {
- c = d;
- ci = di;
- }
- if ( c < f )
- {
- c = f;
- ci = fi;
- }
- p = CC[j];
- CC[j] = c;
- pi = RR[j];
- RR[j] = ci;
- DD[j] = d;
- SS[j] = di;
- if ( ( j == N || i == M ) && c > SCORE )
- {
- SCORE = c;
- ENDI = i;
- ENDJ = j;
- STARI = ci;
- }
- }
- }
- if ( SCORE )
- if ( STARI < 0 )
- {
- STARJ = - STARI;
- STARI = 0;
- }
- else
- STARJ = 0;
- }
-
- /* diff(A,B,M,N,tb,te) returns the score of an optimum conversion between
- A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
- and appends such a conversion to the current script. */
-
- int diff(A,B,M,N,tb,te) char *A, *B;
- int M, N;
- int tb, te;
-
- {
- int midi, midj, type; /* Midpoint, type, and cost */
- int midc;
-
- {
- register int i, j;
- register int c, e, d, s;
- int t;
- char *va;
- char *ckalloc();
-
- /* Boundary cases: M <= 1 or N == 0 */
-
- if (N <= 0)
- {
- if (M > 0) DEL(M)
- return - gap(M);
- }
- if (M <= 1)
- {
- if (M <= 0)
- {
- INS(N);
- return - gap(N);
- }
- if (tb > te) tb = te;
- midc = - (tb + r + gap(N) );
- midj = 0;
- va = varray[A[1]];
- for (j = 1; j <= N; j++)
- {
- c = va[B[j]] - ( gap(j-1) + gap(N-j) );
- if (c > midc)
- {
- midc = c;
- midj = j;
- }
- }
- if (midj == 0)
- {
- INS(N) DEL(1) }
- else
- {
- if (midj > 1) INS(midj-1)
- REP
- if ( (A[1]|32) == (B[midj]|32) )
- no_mat += 1;
- else
- no_mis += 1;
- if (midj < N) INS(N-midj)
- }
- return midc;
- }
-
- /* Divide: Find optimum midpoint (midi,midj) of cost midc */
-
- midi = M/2; /* Forward phase: */
- CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */
- t = -q;
- for (j = 1; j <= N; j++)
- {
- CC[j] = t = t-r;
- DD[j] = t-q;
- }
- t = -tb;
- for (i = 1; i <= midi; i++)
- {
- s = CC[0];
- CC[0] = c = t = t-r;
- e = t-q;
- va = varray[A[i]];
- for (j = 1; j <= N; j++)
- {
- if ((c = c - qr) > (e = e - r)) e = c;
- if ((c = CC[j] - qr) > (d = DD[j] - r)) d = c;
- c = s+va[B[j]];
- if (c < d) c = d;
- if (c < e) c = e;
- s = CC[j];
- CC[j] = c;
- DD[j] = d;
- }
- }
- DD[0] = CC[0];
-
- RR[N] = 0; /* Reverse phase: */
- t = -q; /* Compute R(M/2,k) & S(M/2,k) for all k */
- for (j = N-1; j >= 0; j--)
- {
- RR[j] = t = t-r;
- SS[j] = t-q;
- }
- t = -te;
- for (i = M-1; i >= midi; i--)
- {
- s = RR[N];
- RR[N] = c = t = t-r;
- e = t-q;
- va = varray[A[i+1]];
- for (j = N-1; j >= 0; j--)
- {
- if ((c = c - qr) > (e = e - r)) e = c;
- if ((c = RR[j] - qr) > (d = SS[j] - r)) d = c;
- c = s+va[B[j+1]];
- if (c < d) c = d;
- if (c < e) c = e;
- s = RR[j];
- RR[j] = c;
- SS[j] = d;
- }
- }
- SS[N] = RR[N];
-
- midc = CC[0]+RR[0]; /* Find optimal midpoint */
- midj = 0;
- type = 1;
- for (j = 0; j <= N; j++)
- if ((c = CC[j] + RR[j]) >= midc)
- if (c > midc || CC[j] != DD[j] && RR[j] == SS[j])
- {
- midc = c;
- midj = j;
- }
- for (j = N; j >= 0; j--)
- if ((c = DD[j] + SS[j] + q) > midc)
- {
- midc = c;
- midj = j;
- type = 2;
- }
- }
-
- /* Conquer: recursively around midpoint */
-
- if (type == 1)
- {
- (void) diff(A,B,midi,midj,tb,q);
- (void) diff(A+midi,B+midj,M-midi,N-midj,q,te);
- }
- else
- {
- (void) diff(A,B,midi-1,midj,tb,zero);
- DEL(2);
- (void) diff(A+midi+1,B+midj,M-midi-1,N-midj,zero,te);
- }
- return midc;
- }
-
- /* lib.c - library of C procedures. */
-
- /* fatal - print message and die */
- fatal(msg)
- char *msg;
- {
- (void) fprintf(stderr, "%s\n", msg);
- exit(1);
- }
-
- /* fatalf - format message, print it, and die */
- fatalf(msg, val)
- char *msg, *val;
- {
- (void) fprintf(stderr, msg, val);
- (void) putc('\n', stderr);
- exit(1);
- }
-
- /* ckopen - open file; check for success */
- FILE *ckopen(name, mode)
- char *name, *mode;
- {
- FILE *fopen(), *fp;
-
- if ((fp = fopen(name, mode)) == NULL)
- fatalf("Cannot open %s.", name);
- return(fp);
- }
-
- /* ckalloc - allocate space; check for success */
- char *ckalloc(amount)
- int amount;
- {
- char *p;
-
- if ((p = (char*) malloc( (unsigned) amount)) == NULL)
- fatal("Ran out of memory.");
- return(p);
- }
-
-